Set up options

Install new packages

Load packages

Load data, remake useful variables

# Check the file path
here("nhanes3.rda")
## [1] "/Volumes/GoogleDrive/My Drive/Teaching/EPID674/EPID674_Week7_Class/nhanes3.rda"
# Load the saved R data
load(here("nhanes3.rda"))


# Remake a few variables from last class if they are no longer in your environment
sex1 <- factor(nhanes$sex, levels = c(1, 2), labels = c("male", "female"))
AGE5b <- cut(nhanes$age, quantile(nhanes$age, c(0, .2, .4, .6, .8, 1)), include.lowest = T) # quintiles
AGE5c <- cut(nhanes$age, breaks = c(19, 40, 50, 60, 70, 90))
age5c <- unclass(AGE5c)
nhanes <- cbind(nhanes, sex1, AGE5b, AGE5c, age5c)


# Clean up the dataset. Make sure NaN are NA for key covariates
table(nhanes$educ, useNA = "always")
## 
##    1    2    3  NaN <NA> 
## 2032 2349  660   33    0
nhanes$educ[is.nan(nhanes$educ)] <- NA
table(nhanes$educ, useNA = "always")
## 
##    1    2    3 <NA> 
## 2032 2349  660   33
table(nhanes$alc, useNA = "always")
## 
##    0    1  NaN <NA> 
## 2753 2189  132    0
nhanes$alc[is.nan(nhanes$alc)] <- NA
table(nhanes$alc, useNA = "always")
## 
##    0    1 <NA> 
## 2753 2189  132

7.1. Survival Analysis: Association between total mortality (d_total) and blood lead (bpb)

tab1(nhanes$d_total) # Variable for death due to any cause in NHANES

## nhanes$d_total : 
##         Frequency   %(NA+)   %(NA-)
## 0            3794     74.8     74.8
## 1            1275     25.1     25.2
## NaN             5      0.1      0.0
##   Total      5074    100.0    100.0
summ(nhanes$pmon_mec) # Number of months of follow up 

##  obs. mean   median  s.d.   min.   max.  
##  5069 158.7  170     49.481 0      217
### Define Surv() object to use in later functions
surv.total <- Surv(nhanes$pmon_mec, nhanes$d_total)
surv.total[1:10] # Includes information on time of follow up and whether the person died, denoted with a "+"
##  [1] 203+ 196+ 215+ 132  184+ 143  202+ 206+ 187+ 115
### K-M Life table and curve
fit.total <- survfit(Surv(nhanes$pmon_mec, nhanes$d_total) ~ 1) # Model with no predictors, just outcome
#summary(fit.total)
summary(nhanes$pmon_mec/12) # How many years of follow up do you have?
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00   12.42   14.17   13.22   16.08   18.08       5
fit.total # How many people died over this time period?
## Call: survfit(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ 1)
## 
##    5 observations deleted due to missingness 
##       n  events  median 0.95LCL 0.95UCL 
##    5069    1275      NA      NA      NA
plot(fit.total)

## suppress 95% CI lines and the time marks for censored subjects.
plot(fit.total, ylim = c(0.7, 1.0), conf.int = F, mark.time = F, ylab="Probability of survival", xlab="Months")

### Survival by different levels of covariates
fit.total.sex <- survfit(Surv(nhanes$pmon_mec, nhanes$d_total) ~ nhanes$sex)
fit.total.sex # How many people died in each sex group?
## Call: survfit(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ nhanes$sex)
## 
##    5 observations deleted due to missingness 
##                 n events median 0.95LCL 0.95UCL
## nhanes$sex=1 2332    679     NA      NA      NA
## nhanes$sex=2 2737    596     NA      NA      NA
#summary(fit.total.sex)[1:10]

plot(fit.total.sex, ylim = c(0.6, 1.0), col = c("blue", "red"), lty = c(1, 2), mark.time = F, main = "Kaplan-Meier curve", xlab = "Time (months)", ylab = "Survival probability")
legend("bottomleft", legend = c("Men", "Women"), col = c("blue", "red"),  lty = c(1, 2))

### Test for differences in survival curves
survdiff(Surv(nhanes$pmon_mec, nhanes$d_total) ~ nhanes$sex) # Is there a difference in survival among males and females?
## Call:
## survdiff(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ nhanes$sex)
## 
## n=5069, 5 observations deleted due to missingness.
## 
##                 N Observed Expected (O-E)^2/E (O-E)^2/V
## nhanes$sex=1 2332      679      573      19.7      35.8
## nhanes$sex=2 2737      596      702      16.1      35.8
## 
##  Chisq= 35.8  on 1 degrees of freedom, p= 2e-09

Cox regression

cox.bpb <- coxph(Surv(nhanes$pmon_mec, nhanes$d_total) ~ nhanes$bpb)
summary(cox.bpb) # Is blood Pb associated with survival? Easier to visualize in categories of exposure
## Call:
## coxph(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ nhanes$bpb)
## 
##   n= 5069, number of events= 1275 
##    (5 observations deleted due to missingness)
## 
##                coef exp(coef) se(coef)     z Pr(>|z|)    
## nhanes$bpb 0.070662  1.073219 0.005597 12.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##            exp(coef) exp(-coef) lower .95 upper .95
## nhanes$bpb     1.073     0.9318     1.062     1.085
## 
## Concordance= 0.62  (se = 0.008 )
## Likelihood ratio test= 113.3  on 1 df,   p=<2e-16
## Wald test            = 159.4  on 1 df,   p=<2e-16
## Score (logrank) test = 153.9  on 1 df,   p=<2e-16
bpb3 <- cut2(nhanes$bpb, g = 3)
tab1(bpb3) # Tertiles of blood Pb

## bpb3 : 
##            Frequency Percent Cum. percent
## [0.7, 2.4)      1760    34.7         34.7
## [2.4, 4.4)      1672    33.0         67.6
## [4.4,48.1]      1642    32.4        100.0
##   Total         5074   100.0        100.0
# K-M Life table and curve
fit.total.bpb3 <- survfit(Surv(nhanes$pmon_mec, nhanes$d_total) ~ factor(bpb3))
fit.total.bpb3
## Call: survfit(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ factor(bpb3))
## 
##    5 observations deleted due to missingness 
##                            n events median 0.95LCL 0.95UCL
## factor(bpb3)=[0.7, 2.4) 1758    248     NA      NA      NA
## factor(bpb3)=[2.4, 4.4) 1670    451     NA      NA      NA
## factor(bpb3)=[4.4,48.1] 1641    576     NA      NA      NA
plot(fit.total.bpb3, col = c(1:3), lty = c(1:3), ylim = c(0.6, 1.0), main = "Survival in relation to blood lead levels", xlab = "Time (months)", ylab = "Survival probability") 
legend(30, 0.7, legend = c("Q1", "Q2", "Q3"), lty = c(1:3), col = c(1:3))

# crude
cox.bpb3 <- coxph(Surv(nhanes$pmon_mec, nhanes$d_total) ~ factor(bpb3))
summary(cox.bpb3)
## Call:
## coxph(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ factor(bpb3))
## 
##   n= 5069, number of events= 1275 
##    (5 observations deleted due to missingness)
## 
##                           coef exp(coef) se(coef)      z Pr(>|z|)    
## factor(bpb3)[2.4, 4.4) 0.70388   2.02158  0.07906  8.903   <2e-16 ***
## factor(bpb3)[4.4,48.1] 1.00452   2.73061  0.07599 13.219   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                        exp(coef) exp(-coef) lower .95 upper .95
## factor(bpb3)[2.4, 4.4)     2.022     0.4947     1.731     2.360
## factor(bpb3)[4.4,48.1]     2.731     0.3662     2.353     3.169
## 
## Concordance= 0.605  (se = 0.007 )
## Likelihood ratio test= 196.5  on 2 df,   p=<2e-16
## Wald test            = 174.7  on 2 df,   p=<2e-16
## Score (logrank) test = 186.5  on 2 df,   p=<2e-16
# adjusted
cox.bpb3.adj <- coxph(Surv(nhanes$pmon_mec, nhanes$d_total) ~ bpb3 + nhanes$age + factor(nhanes$sex) + factor(nhanes$race) + factor(nhanes$educ) + factor(nhanes$smk) + factor(nhanes$alc))
summary(cox.bpb3.adj)
## Call:
## coxph(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ bpb3 + 
##     nhanes$age + factor(nhanes$sex) + factor(nhanes$race) + factor(nhanes$educ) + 
##     factor(nhanes$smk) + factor(nhanes$alc))
## 
##   n= 4905, number of events= 1211 
##    (169 observations deleted due to missingness)
## 
##                           coef exp(coef)  se(coef)      z Pr(>|z|)    
## bpb3[2.4, 4.4)        0.069004  1.071440  0.081665  0.845  0.39813    
## bpb3[4.4,48.1]        0.084549  1.088226  0.083037  1.018  0.30858    
## nhanes$age            0.088495  1.092529  0.002373 37.287  < 2e-16 ***
## factor(nhanes$sex)2  -0.294962  0.744560  0.065259 -4.520 6.19e-06 ***
## factor(nhanes$race)2  0.178596  1.195537  0.070922  2.518  0.01180 *  
## factor(nhanes$educ)2  0.001651  1.001652  0.062588  0.026  0.97895    
## factor(nhanes$educ)3 -0.205472  0.814263  0.102174 -2.011  0.04432 *  
## factor(nhanes$smk)2   0.171628  1.187237  0.070372  2.439  0.01473 *  
## factor(nhanes$smk)3   0.631970  1.881314  0.082193  7.689 1.48e-14 ***
## factor(nhanes$alc)1  -0.182403  0.833265  0.067767 -2.692  0.00711 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                      exp(coef) exp(-coef) lower .95 upper .95
## bpb3[2.4, 4.4)          1.0714     0.9333    0.9130    1.2574
## bpb3[4.4,48.1]          1.0882     0.9189    0.9248    1.2806
## nhanes$age              1.0925     0.9153    1.0875    1.0976
## factor(nhanes$sex)2     0.7446     1.3431    0.6552    0.8462
## factor(nhanes$race)2    1.1955     0.8364    1.0404    1.3738
## factor(nhanes$educ)2    1.0017     0.9984    0.8860    1.1324
## factor(nhanes$educ)3    0.8143     1.2281    0.6665    0.9948
## factor(nhanes$smk)2     1.1872     0.8423    1.0343    1.3628
## factor(nhanes$smk)3     1.8813     0.5315    1.6014    2.2102
## factor(nhanes$alc)1     0.8333     1.2001    0.7296    0.9516
## 
## Concordance= 0.857  (se = 0.005 )
## Likelihood ratio test= 2351  on 10 df,   p=<2e-16
## Wald test            = 1629  on 10 df,   p=<2e-16
## Score (logrank) test = 2328  on 10 df,   p=<2e-16
### Test for the proportional hazards assumption
test.prop <- cox.zph(cox.bpb3.adj) # Do any of the variables violate the proportional hazards assumption? May consider stratifying by sex.
test.prop 
##                      chisq df     p
## bpb3                0.7663  2 0.682
## nhanes$age          1.3879  1 0.239
## factor(nhanes$sex)  4.4387  1 0.035
## factor(nhanes$race) 0.3733  1 0.541
## factor(nhanes$educ) 0.1618  2 0.922
## factor(nhanes$smk)  0.4093  2 0.815
## factor(nhanes$alc)  0.0235  1 0.878
## GLOBAL              8.0277 10 0.626
## Display a graph of the scaled Schoenfeld residuals, along with a smooth curve
plot(test.prop) # for all variables

plot(test.prop, var = 4) + # Can call up variables indivdually, here for sex
abline(h = 0, lty = 3, col = 2)

## integer(0)
# Stratify by sex
cox.bpb3.adj1 <- coxph(Surv(nhanes$pmon_mec, nhanes$d_total) ~ bpb3 + nhanes$age + strata(nhanes$sex) + factor(nhanes$race) + factor(nhanes$educ) + factor(nhanes$smk) + factor(nhanes$alc))
summary(cox.bpb3.adj1)
## Call:
## coxph(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ bpb3 + 
##     nhanes$age + strata(nhanes$sex) + factor(nhanes$race) + factor(nhanes$educ) + 
##     factor(nhanes$smk) + factor(nhanes$alc))
## 
##   n= 4905, number of events= 1211 
##    (169 observations deleted due to missingness)
## 
##                           coef exp(coef)  se(coef)      z Pr(>|z|)    
## bpb3[2.4, 4.4)        0.069557  1.072033  0.081702  0.851  0.39458    
## bpb3[4.4,48.1]        0.088475  1.092506  0.083082  1.065  0.28692    
## nhanes$age            0.088456  1.092486  0.002374 37.260  < 2e-16 ***
## factor(nhanes$race)2  0.178662  1.195616  0.070942  2.518  0.01179 *  
## factor(nhanes$educ)2  0.001237  1.001238  0.062599  0.020  0.98424    
## factor(nhanes$educ)3 -0.209755  0.810783  0.102180 -2.053  0.04009 *  
## factor(nhanes$smk)2   0.166505  1.181170  0.070394  2.365  0.01801 *  
## factor(nhanes$smk)3   0.631710  1.880825  0.082187  7.686 1.51e-14 ***
## factor(nhanes$alc)1  -0.179672  0.835545  0.067736 -2.653  0.00799 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                      exp(coef) exp(-coef) lower .95 upper .95
## bpb3[2.4, 4.4)          1.0720     0.9328    0.9134    1.2582
## bpb3[4.4,48.1]          1.0925     0.9153    0.9283    1.2857
## nhanes$age              1.0925     0.9153    1.0874    1.0976
## factor(nhanes$race)2    1.1956     0.8364    1.0404    1.3740
## factor(nhanes$educ)2    1.0012     0.9988    0.8856    1.1319
## factor(nhanes$educ)3    0.8108     1.2334    0.6636    0.9906
## factor(nhanes$smk)2     1.1812     0.8466    1.0289    1.3559
## factor(nhanes$smk)3     1.8808     0.5317    1.6010    2.2096
## factor(nhanes$alc)1     0.8355     1.1968    0.7317    0.9542
## 
## Concordance= 0.855  (se = 0.005 )
## Likelihood ratio test= 2315  on 9 df,   p=<2e-16
## Wald test            = 1599  on 9 df,   p=<2e-16
## Score (logrank) test = 2294  on 9 df,   p=<2e-16
cox.zph(cox.bpb3.adj1)
##                     chisq df    p
## bpb3                0.443  2 0.80
## nhanes$age          1.321  1 0.25
## factor(nhanes$race) 0.404  1 0.53
## factor(nhanes$educ) 0.156  2 0.93
## factor(nhanes$smk)  1.038  2 0.60
## factor(nhanes$alc)  0.476  1 0.49
## GLOBAL              3.675  9 0.93

Optional: Exercise 7A

Not a graded assignment.

7.2. Linear and nonlinear mixed effects models with the nlme package

Identify and understand your clustering variable (random effect)

##Distribution of primary sampling unit (psu) and cluster (strata)
tab1(nhanes$psu)        # 2 PSU

## nhanes$psu : 
##         Frequency Percent Cum. percent
## 1            2481    48.9         48.9
## 2            2593    51.1        100.0
##   Total      5074   100.0        100.0
tab1(nhanes$strata) # 49 strata

## nhanes$strata : 
##         Frequency Percent Cum. percent
## 1             102     2.0          2.0
## 2             116     2.3          4.3
## 3             113     2.2          6.5
## 4              90     1.8          8.3
## 5              89     1.8         10.1
## 6              99     2.0         12.0
## 7             124     2.4         14.4
## 8             120     2.4         16.8
## 9             105     2.1         18.9
## 10            121     2.4         21.3
## 11            115     2.3         23.5
## 12            103     2.0         25.6
## 13            110     2.2         27.7
## 14            125     2.5         30.2
## 15            127     2.5         32.7
## 16            141     2.8         35.5
## 17            143     2.8         38.3
## 18            126     2.5         40.8
## 19            134     2.6         43.4
## 20            119     2.3         45.8
## 21            106     2.1         47.9
## 22            128     2.5         50.4
## 23            114     2.2         52.6
## 24            116     2.3         54.9
## 25            112     2.2         57.1
## 26            120     2.4         59.5
## 27            128     2.5         62.0
## 28            116     2.3         64.3
## 29            127     2.5         66.8
## 30            112     2.2         69.0
## 31            132     2.6         71.6
## 32            103     2.0         73.6
## 33            127     2.5         76.1
## 34            122     2.4         78.5
## 35             66     1.3         79.8
## 36             53     1.0         80.9
## 37             37     0.7         81.6
## 38             46     0.9         82.5
## 39             86     1.7         84.2
## 40             57     1.1         85.3
## 41             70     1.4         86.7
## 42             44     0.9         87.6
## 43             99     2.0         89.5
## 44            111     2.2         91.7
## 45             79     1.6         93.3
## 46             57     1.1         94.4
## 47             51     1.0         95.4
## 48            148     2.9         98.3
## 49             85     1.7        100.0
##   Total      5074   100.0        100.0
nhanes$locode<-ifelse(nhanes$psu==1, nhanes$strata, nhanes$strata+49)
tab1(nhanes$locode)

## nhanes$locode : 
##         Frequency Percent Cum. percent
## 1              50     1.0          1.0
## 2              49     1.0          2.0
## 3              55     1.1          3.0
## 4              45     0.9          3.9
## 5              49     1.0          4.9
## 6              50     1.0          5.9
## 7              59     1.2          7.0
## 8              59     1.2          8.2
## 9              52     1.0          9.2
## 10             65     1.3         10.5
## 11             55     1.1         11.6
## 12             45     0.9         12.5
## 13             54     1.1         13.5
## 14             67     1.3         14.9
## 15             61     1.2         16.1
## 16             69     1.4         17.4
## 17             68     1.3         18.8
## 18             60     1.2         19.9
## 19             59     1.2         21.1
## 20             54     1.1         22.2
## 21             55     1.1         23.3
## 22             57     1.1         24.4
## 23             70     1.4         25.8
## 24             59     1.2         26.9
## 25             50     1.0         27.9
## 26             49     1.0         28.9
## 27             76     1.5         30.4
## 28             58     1.1         31.5
## 29             60     1.2         32.7
## 30             50     1.0         33.7
## 31             60     1.2         34.9
## 32             52     1.0         35.9
## 33             64     1.3         37.2
## 34             55     1.1         38.2
## 35             33     0.7         38.9
## 36             26     0.5         39.4
## 37             19     0.4         39.8
## 38             21     0.4         40.2
## 39             48     0.9         41.1
## 40             31     0.6         41.7
## 41             25     0.5         42.2
## 42             21     0.4         42.6
## 43             52     1.0         43.7
## 44             48     0.9         44.6
## 45             40     0.8         45.4
## 46             31     0.6         46.0
## 47             24     0.5         46.5
## 48             83     1.6         48.1
## 49             39     0.8         48.9
## 50             52     1.0         49.9
## 51             67     1.3         51.2
## 52             58     1.1         52.4
## 53             45     0.9         53.3
## 54             40     0.8         54.1
## 55             49     1.0         55.0
## 56             65     1.3         56.3
## 57             61     1.2         57.5
## 58             53     1.0         58.6
## 59             56     1.1         59.7
## 60             60     1.2         60.8
## 61             58     1.1         62.0
## 62             56     1.1         63.1
## 63             58     1.1         64.2
## 64             66     1.3         65.5
## 65             72     1.4         66.9
## 66             75     1.5         68.4
## 67             66     1.3         69.7
## 68             75     1.5         71.2
## 69             65     1.3         72.5
## 70             51     1.0         73.5
## 71             71     1.4         74.9
## 72             44     0.9         75.8
## 73             57     1.1         76.9
## 74             62     1.2         78.1
## 75             71     1.4         79.5
## 76             52     1.0         80.5
## 77             58     1.1         81.7
## 78             67     1.3         83.0
## 79             62     1.2         84.2
## 80             72     1.4         85.6
## 81             51     1.0         86.6
## 82             63     1.2         87.9
## 83             67     1.3         89.2
## 84             33     0.7         89.9
## 85             27     0.5         90.4
## 86             18     0.4         90.7
## 87             25     0.5         91.2
## 88             38     0.7         92.0
## 89             26     0.5         92.5
## 90             45     0.9         93.4
## 91             23     0.5         93.8
## 92             47     0.9         94.8
## 93             63     1.2         96.0
## 94             39     0.8         96.8
## 95             26     0.5         97.3
## 96             27     0.5         97.8
## 97             65     1.3         99.1
## 98             46     0.9        100.0
##   Total      5074   100.0        100.0
table(nhanes$psu, nhanes$locode)
##    
##      1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
##   1 50 49 55 45 49 50 59 59 52 65 55 45 54 67 61 69 68 60 59 54 55 57 70 59 50
##   2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##    
##     26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
##   1 49 76 58 60 50 60 52 64 55 33 26 19 21 48 31 25 21 52 48 40 31 24 83 39  0
##   2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 52
##    
##     51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
##   1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   2 67 58 45 40 49 65 61 53 56 60 58 56 58 66 72 75 66 75 65 51 71 44 57 62 71
##    
##     76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
##   1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   2 52 58 67 62 72 51 63 67 33 27 18 25 38 26 45 23 47 63 39 26 27 65 46

Perform mixed effects models with random intercepts and random slopes

###Random intercepts only model for SBP
sbp.lme <- lme(fixed = log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) + factor(smk) + factor(educ) + factor(alc), random=~1|locode, na.action=na.omit, data=nhanes)
summary(sbp.lme)
## Linear mixed-effects model fit by REML
##   Data: nhanes 
##         AIC       BIC   logLik
##   -7179.826 -7088.897 3603.913
## 
## Random effects:
##  Formula: ~1 | locode
##         (Intercept)  Residual
## StdDev:  0.01497941 0.1138104
## 
## Fixed effects:  log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) +      factor(smk) + factor(educ) + factor(alc) 
##                   Value   Std.Error   DF  t-value p-value
## (Intercept)    4.504171 0.014086651 4793 319.7474  0.0000
## log(bpb)       0.012362 0.002685955 4793   4.6025  0.0000
## age            0.003033 0.000537513 4793   5.6418  0.0000
## I(age^2)       0.000016 0.000005123 4793   3.1197  0.0018
## bmi            0.004815 0.000294457 4793  16.3508  0.0000
## factor(race)2  0.013063 0.004054943 4793   3.2216  0.0013
## factor(sex)2  -0.032211 0.003780680 4793  -8.5200  0.0000
## factor(smk)2  -0.014085 0.004267112 4793  -3.3009  0.0010
## factor(smk)3  -0.000873 0.004340996 4793  -0.2012  0.8406
## factor(educ)2  0.004741 0.003721299 4793   1.2740  0.2027
## factor(educ)3 -0.014662 0.005529777 4793  -2.6515  0.0080
## factor(alc)1   0.005850 0.003696056 4793   1.5827  0.1136
##  Correlation: 
##               (Intr) lg(bp) age    I(g^2) bmi    fctr(r)2 fctr(sx)2 fctr(sm)2
## log(bpb)      -0.124                                                         
## age           -0.693 -0.107                                                  
## I(age^2)       0.647  0.052 -0.983                                           
## bmi           -0.400  0.034 -0.221  0.222                                    
## factor(race)2 -0.041 -0.112 -0.007  0.037 -0.074                             
## factor(sex)2  -0.183  0.338 -0.059  0.050 -0.028 -0.029                      
## factor(smk)2   0.051 -0.011 -0.154  0.124 -0.024  0.054    0.194             
## factor(smk)3  -0.044 -0.184 -0.115  0.136  0.105 -0.031    0.056     0.345   
## factor(educ)2 -0.180  0.137 -0.025  0.045  0.049 -0.053   -0.034    -0.006   
## factor(educ)3 -0.109  0.167 -0.107  0.110  0.088  0.027    0.063     0.051   
## factor(alc)1  -0.188 -0.087  0.016  0.029  0.052  0.087    0.222    -0.070   
##               fctr(s)3 fctr(d)2 fctr(d)3
## log(bpb)                                
## age                                     
## I(age^2)                                
## bmi                                     
## factor(race)2                           
## factor(sex)2                            
## factor(smk)2                            
## factor(smk)3                            
## factor(educ)2  0.039                    
## factor(educ)3  0.131    0.415           
## factor(alc)1  -0.149   -0.111   -0.131  
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -4.63615371 -0.64264248 -0.05180877  0.57432592  5.10209571 
## 
## Number of Observations: 4902
## Number of Groups: 98
##Return variance-covariance matrix for random variables
getVarCov(sbp.lme)
## Random effects variance covariance matrix
##             (Intercept)
## (Intercept)  0.00022438
##   Standard Deviations: 0.014979
##Random Intercepts and slopes model
sbp.lme2<-lme(fixed = log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) + factor(smk) + factor(educ) + factor(alc), random=~1+log(bpb)|locode, na.action=na.omit, data=nhanes)
summary(sbp.lme2)
## Linear mixed-effects model fit by REML
##   Data: nhanes 
##         AIC       BIC   logLik
##   -7175.826 -7071.907 3603.913
## 
## Random effects:
##  Formula: ~1 + log(bpb) | locode
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev       Corr  
## (Intercept) 1.497941e-02 (Intr)
## log(bpb)    1.048939e-05 -0.001
## Residual    1.138104e-01       
## 
## Fixed effects:  log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) +      factor(smk) + factor(educ) + factor(alc) 
##                   Value   Std.Error   DF  t-value p-value
## (Intercept)    4.504171 0.014086651 4793 319.7474  0.0000
## log(bpb)       0.012362 0.002685955 4793   4.6025  0.0000
## age            0.003033 0.000537513 4793   5.6418  0.0000
## I(age^2)       0.000016 0.000005123 4793   3.1197  0.0018
## bmi            0.004815 0.000294457 4793  16.3508  0.0000
## factor(race)2  0.013063 0.004054943 4793   3.2216  0.0013
## factor(sex)2  -0.032211 0.003780680 4793  -8.5200  0.0000
## factor(smk)2  -0.014085 0.004267112 4793  -3.3009  0.0010
## factor(smk)3  -0.000873 0.004340996 4793  -0.2012  0.8406
## factor(educ)2  0.004741 0.003721299 4793   1.2740  0.2027
## factor(educ)3 -0.014662 0.005529777 4793  -2.6515  0.0080
## factor(alc)1   0.005850 0.003696056 4793   1.5827  0.1136
##  Correlation: 
##               (Intr) lg(bp) age    I(g^2) bmi    fctr(r)2 fctr(sx)2 fctr(sm)2
## log(bpb)      -0.124                                                         
## age           -0.693 -0.107                                                  
## I(age^2)       0.647  0.052 -0.983                                           
## bmi           -0.400  0.034 -0.221  0.222                                    
## factor(race)2 -0.041 -0.112 -0.007  0.037 -0.074                             
## factor(sex)2  -0.183  0.338 -0.059  0.050 -0.028 -0.029                      
## factor(smk)2   0.051 -0.011 -0.154  0.124 -0.024  0.054    0.194             
## factor(smk)3  -0.044 -0.184 -0.115  0.136  0.105 -0.031    0.056     0.345   
## factor(educ)2 -0.180  0.137 -0.025  0.045  0.049 -0.053   -0.034    -0.006   
## factor(educ)3 -0.109  0.167 -0.107  0.110  0.088  0.027    0.063     0.051   
## factor(alc)1  -0.188 -0.087  0.016  0.029  0.052  0.087    0.222    -0.070   
##               fctr(s)3 fctr(d)2 fctr(d)3
## log(bpb)                                
## age                                     
## I(age^2)                                
## bmi                                     
## factor(race)2                           
## factor(sex)2                            
## factor(smk)2                            
## factor(smk)3                            
## factor(educ)2  0.039                    
## factor(educ)3  0.131    0.415           
## factor(alc)1  -0.149   -0.111   -0.131  
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -4.63615371 -0.64264247 -0.05180877  0.57432589  5.10209563 
## 
## Number of Observations: 4902
## Number of Groups: 98
getVarCov(sbp.lme2)
## Random effects variance covariance matrix
##             (Intercept)    log(bpb)
## (Intercept)  2.2438e-04 -8.3127e-11
## log(bpb)    -8.3127e-11  1.1003e-10
##   Standard Deviations: 0.014979 1.0489e-05
## compare these two models
anova(sbp.lme, sbp.lme2)
##          Model df       AIC       BIC   logLik   Test      L.Ratio p-value
## sbp.lme      1 14 -7179.826 -7088.897 3603.913                            
## sbp.lme2     2 16 -7175.826 -7071.907 3603.913 1 vs 2 2.225843e-06       1

7.3. Analysis of complex survey samples with the survey package

Build a survey design object

#### first specify the design effect
bpdsn<-svydesign(id=~psu, strata=~strata, weights=~wt_mh, data=nhanes, nest=T)
bpdsn
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (98) clusters.
## svydesign(id = ~psu, strata = ~strata, weights = ~wt_mh, data = nhanes, 
##     nest = T)
#nest=T: relabel cluster ids to enforce nesting within strata

Descriptive statistics with survey weighting

#### Descriptive analyses
svymean(~ nhanes$age, design=bpdsn)
##              mean     SE
## nhanes$age 45.041 0.5403
summ(nhanes$age)

##  obs. mean   median  s.d.   min.   max.  
##  5074 48.744 46      19.297 20     90
svymean(~ nhanes$age + nhanes$sbp + nhanes$bpb, design=bpdsn)
##               mean     SE
## nhanes$age  45.041 0.5403
## nhanes$sbp 122.656 0.4443
## nhanes$bpb   3.463 0.0958
svymean(~ nhanes$sbp + nhanes$bpb + nhanes$bmi, design=bpdsn)
##               mean  SE
## nhanes$sbp 122.656 Inf
## nhanes$bpb   3.463 Inf
## nhanes$bmi     NaN NaN
svymean(~ nhanes$sbp + nhanes$bpb + nhanes$bmi, design=bpdsn, na.rm=T)
##                mean     SE
## nhanes$sbp 122.6401 0.4437
## nhanes$bpb   3.4614 0.0957
## nhanes$bmi  26.5947 0.1322
svytable(~ nhanes$sex, design=bpdsn)
## nhanes$sex
##        1        2 
## 25097913 27754405
svytable(~ nhanes$sex, Ntotal=100, design=bpdsn)
## nhanes$sex
##        1        2 
## 47.48687 52.51313
svytable(~ nhanes$sex + nhanes$race, design=bpdsn)
##           nhanes$race
## nhanes$sex        1        2
##          1 22552719  2545194
##          2 24334379  3420026
svytable(~ nhanes$sex + nhanes$race, Ntotal=100, design=bpdsn)
##           nhanes$race
## nhanes$sex         1         2
##          1 42.671202  4.815671
##          2 46.042217  6.470911
#svychisq(~ nhanes$sex + nhanes$race, design=bpdsn)
chisq.test(nhanes$sex, nhanes$race)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  nhanes$sex and nhanes$race
## X-squared = 5.6875, df = 1, p-value = 0.01709
svytable(~ nhanes$htn + nhanes$smk, design=bpdsn)
##           nhanes$smk
## nhanes$htn        1        2        3
##          0 17445187  8024355 11037244
##          1  7361376  5478611  3505545
#svychisq(~ nhanes$htn + nhanes$smk, design=bpdsn, statistic="Wald")
## Survey Chisq used in SUDAAN

Survey weighting univariate, bivariate, and graphical analyses

## Univariate distributions and bivariate associations using graphical analyses
svyhist(~ nhanes$sbp, design=bpdsn)

svyhist(~ log(nhanes$sbp), design=bpdsn)

svyhist(~ nhanes$bpb, bpdsn)

svyhist(~ log(nhanes$bpb), bpdsn)

svyboxplot(log(nhanes$sbp) ~ 1, bpdsn)

# svyboxplot(log(nhanes$sbp) ~ as.factor(nhanes$smk), bpdsn)

# bp.bysexrace <- svyby(~ nhanes$sbp + nhanes$dbp, ~ nhanes$sex + nhanes$race, bpdsn, svymean)
# barplot(bp.bysexrace)
# barplot(bp.bysexrace, legend=TRUE)

## change the label of x-axis and ylim
# xlabel<-c("Male, White","Female, White","Male, Black","Female, Black")
# barplot(bp.bysexrace, legend=TRUE, ylim=c(0,160),col=c("purple","violet"), names=xlabel)

#simple scatterplot with circles whose area is proportional to the sampling weight
svyplot(log(nhanes$sbp) ~ nhanes$age, bpdsn, style="bubble")

#style="transparent" plots points with opacity proportional to sampling weight 
svyplot(log(nhanes$sbp)~log(nhanes$bpb), bpdsn, style="transparent", pch=19, xlab="Blood lead", ylab="log(SBP)")

## Scatterplot smoothing
smth.sbp.bpb1<-svysmooth(log(nhanes$sbp) ~ nhanes$bpb, bpdsn, bandwidth=10)
#fit local polynomial kernel smoothing with a bandwidth=10 bpb unit 
plot(smth.sbp.bpb1)

smth.sbp.bpb2<-svysmooth(log(nhanes$sbp) ~nhanes$bpb, bpdsn, bandwidth=20)
plot(smth.sbp.bpb2)

Survey weighted regression analyses

#### Linear and mixed effect models
## First fit a linear regression
sbp.lm<-lm(log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) + factor(smk) + factor(educ) + factor(alc), data=nhanes)
summary(sbp.lm)
## 
## Call:
## lm(formula = log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + 
##     factor(sex) + factor(smk) + factor(educ) + factor(alc), data = nhanes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.53077 -0.07335 -0.00539  0.06647  0.58491 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    4.504e+00  1.399e-02 321.836  < 2e-16 ***
## log(bpb)       1.127e-02  2.635e-03   4.275 1.95e-05 ***
## age            3.169e-03  5.381e-04   5.889 4.14e-09 ***
## I(age^2)       1.444e-05  5.126e-06   2.817 0.004873 ** 
## bmi            4.857e-03  2.952e-04  16.453  < 2e-16 ***
## factor(race)2  1.484e-02  3.786e-03   3.920 8.97e-05 ***
## factor(sex)2  -3.295e-02  3.777e-03  -8.722  < 2e-16 ***
## factor(smk)2  -1.427e-02  4.280e-03  -3.333 0.000864 ***
## factor(smk)3  -7.149e-04  4.331e-03  -0.165 0.868907    
## factor(educ)2  2.335e-03  3.673e-03   0.636 0.525031    
## factor(educ)3 -1.718e-02  5.452e-03  -3.151 0.001636 ** 
## factor(alc)1   4.695e-03  3.687e-03   1.273 0.202981    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1148 on 4890 degrees of freedom
##   (172 observations deleted due to missingness)
## Multiple R-squared:  0.4166, Adjusted R-squared:  0.4153 
## F-statistic: 317.5 on 11 and 4890 DF,  p-value: < 2.2e-16
sbp.lme<-lme(fixed = log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) + factor(smk) + factor(educ) + factor(alc), random=~1|strata, 
na.action=na.omit, data=nhanes)
summary(sbp.lme)
## Linear mixed-effects model fit by REML
##   Data: nhanes 
##         AIC       BIC   logLik
##   -7180.906 -7089.976 3604.453
## 
## Random effects:
##  Formula: ~1 | strata
##         (Intercept)  Residual
## StdDev:  0.01329355 0.1140193
## 
## Fixed effects:  log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) +      factor(smk) + factor(educ) + factor(alc) 
##                   Value   Std.Error   DF  t-value p-value
## (Intercept)    4.504420 0.014118879 4842 319.0352  0.0000
## log(bpb)       0.012464 0.002678746 4842   4.6528  0.0000
## age            0.002986 0.000537173 4842   5.5596  0.0000
## I(age^2)       0.000016 0.000005122 4842   3.2103  0.0013
## bmi            0.004846 0.000294231 4842  16.4707  0.0000
## factor(race)2  0.013309 0.004087288 4842   3.2562  0.0011
## factor(sex)2  -0.032535 0.003777089 4842  -8.6137  0.0000
## factor(smk)2  -0.014623 0.004267145 4842  -3.4269  0.0006
## factor(smk)3  -0.001351 0.004343337 4842  -0.3110  0.7558
## factor(educ)2  0.005259 0.003728817 4842   1.4102  0.1585
## factor(educ)3 -0.014102 0.005533291 4842  -2.5487  0.0108
## factor(alc)1   0.005399 0.003691217 4842   1.4627  0.1436
##  Correlation: 
##               (Intr) lg(bp) age    I(g^2) bmi    fctr(r)2 fctr(sx)2 fctr(sm)2
## log(bpb)      -0.127                                                         
## age           -0.690 -0.106                                                  
## I(age^2)       0.645  0.051 -0.983                                           
## bmi           -0.398  0.036 -0.222  0.222                                    
## factor(race)2 -0.042 -0.108 -0.009  0.040 -0.074                             
## factor(sex)2  -0.183  0.340 -0.059  0.050 -0.027 -0.028                      
## factor(smk)2   0.051 -0.015 -0.154  0.123 -0.023  0.057    0.192             
## factor(smk)3  -0.044 -0.185 -0.115  0.135  0.105 -0.027    0.054     0.344   
## factor(educ)2 -0.179  0.141 -0.026  0.047  0.047 -0.054   -0.034    -0.005   
## factor(educ)3 -0.107  0.172 -0.109  0.112  0.086  0.026    0.062     0.051   
## factor(alc)1  -0.188 -0.085  0.015  0.030  0.053  0.085    0.222    -0.070   
##               fctr(s)3 fctr(d)2 fctr(d)3
## log(bpb)                                
## age                                     
## I(age^2)                                
## bmi                                     
## factor(race)2                           
## factor(sex)2                            
## factor(smk)2                            
## factor(smk)3                            
## factor(educ)2  0.040                    
## factor(educ)3  0.131    0.415           
## factor(alc)1  -0.150   -0.111   -0.134  
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -4.66735508 -0.64180845 -0.05163683  0.58517448  5.12186931 
## 
## Number of Observations: 4902
## Number of Groups: 49
## Let's use the survey package
# sbp.svy<-svyglm(log(nhanes$sbp) ~ log(nhanes$bpb) + nhanes$age + I(nhanes$age^2) + nhanes$bmi + factor(nhanes$race) + factor(nhanes$sex) + factor(nhanes$smk) + factor(nhanes$educ) + factor(nhanes$alc), bpdsn)
# summary(sbp.svy)
# plot(sbp.svy)

# par(mfrow=c(2,2))
# plot(sbp.lm, which=1)
# plot(sbp.lm, which=2)
# plot(sbp.svy, which=1)
# plot(sbp.svy, which=2)
# par(mfrow=c(1,1))

## compare the beta's
# summary(sbp.lm)$coef[2,]
# summary(sbp.lme)$tTable[2,]
# summary(sbp.svy)$coef[2,]

#### Logistic regression model
# htn.svy<-svyglm(nhanes$htn ~ log(nhanes$bpb) + ns(nhanes$age,df=5) + ns(nhanes$bmi,df=5) + factor(nhanes$race) + factor(nhanes$sex) + factor(nhanes$educ) + nhanes$hematoc + nhanes$chol + nhanes$packyrs + nhanes$diag_dm, family=quasibinomial(), bpdsn)
# summary(htn.svy)

7.4. Meta-analysis in R with the rmeta package

Set up objects for meta analysis

##Suppose you have the estimates from 16 cities of the effect of PM2.5 
##on Myocardial Infarction (MI) hospital admissions. 
##We want to obtain the combined effect across all the cities.
city<-1:16
coef<-c(0.00155,0.00445,-0.00597,0.00237,0.00031,-0.00035,0.00206,0.00127,
-0.00395,-0.00434,0.00241,0.00730,-0.00175,0.00117,0.00007,0.00350)

se<-c(0.013589379,0.003224905,0.002400626,0.001797566,0.001019834,0.002658267,
0.001529252,0.002388748,0.002583907,0.005885021,0.00122501,0.00597465,
0.002546777,0.002406304,0.000913846,0.00517299)

Perform meta-analysis

model<-meta.summaries(coef, se, names=city, method="random")
#method="random" estimates and adds a heterogeneity variance
summary(model)
## Random-effects meta-analysis
## Call: meta.summaries(d = coef, se = se, method = "random", names = city)
## ----------------------------------------------------
##    Effect (lower  95% upper) weights
## 1    0.00   -0.03       0.03     0.0
## 2    0.00    0.00       0.01     0.5
## 3   -0.01   -0.01       0.00     0.8
## 4    0.00    0.00       0.01     1.3
## 5    0.00    0.00       0.00     2.5
## 6    0.00   -0.01       0.00     0.7
## 7    0.00    0.00       0.01     1.6
## 8    0.00    0.00       0.01     0.8
## 9    0.00   -0.01       0.00     0.7
## 10   0.00   -0.02       0.01     0.2
## 11   0.00    0.00       0.00     2.1
## 12   0.01    0.00       0.02     0.2
## 13   0.00   -0.01       0.00     0.7
## 14   0.00    0.00       0.01     0.8
## 15   0.00    0.00       0.00     2.8
## 16   0.00   -0.01       0.01     0.2
## ----------------------------------------------------
## Summary effect: 0 95% CI ( 0,0 )
## Estimated heterogeneity variance: 1.2e-06  p= 0.176
##Plot the summary results
metaplot(coef, se)

##Get combined estimate, its standard error and RR and 95% CI
model$summary 
## [1] 0.0005330941
model$summ
## [1] 0.0005330941
model$se.summary 
## [1] 0.0005977285
model$se
## [1] 0.0005977285
exp(model$summ*10)
## [1] 1.005345
exp((model$summ-1.96*model$se)*10)
## [1] 0.9936358
exp((model$summ+1.96*model$se)*10)
## [1] 1.017193

End of the class!

Thank you so much!